Ce document vise à rassembler les commandes utiles pour programmer en R.


Départ d’analyse

Import données

  • JSON
library(jsonlite)
decp_Lambersart <- fromJSON(txt = "../data/decp/decp_acheteur.json", flatten = T)
decp_Lambersart <- as.data.frame(decp_Lambersart$marches) |> 
  mutate(titulaires = map(titulaires, ~ mutate(.x, id = as.character(id)))) |> 
  unnest(cols = c(titulaires))
  • XML
library(XML)
library(httr)
data_19 <- xmlParse(content(GET("https://marchespublics596280.fr/app.php/api/v1/donnees-essentielles/contrat/xml-extraire-criteres/50286/a:1:%7Bi:0;i:0;%7D/1/2019/false/false/false/false/false/false/false/false/false", user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36")), "text"), 
                      encoding = "UTF-8")
xml_df_19 <- xmlToDataFrame(nodes = getNodeSet(data_19, "//marche")) |> mutate(Year = 2019)
  • SAS
library(haven)
data <- read_sas("../data/mon_fichier.sas7bdat")
  • Multiple CSV into folder (import and rbind)
library(data.table)
rbindlist_fread <- function(path, pattern = "*.csv") {
    files = list.files(path, pattern, full.names = TRUE)
    data.table::rbindlist(lapply(files, function(x) fread(x)))
}
data <- rbindlist_fread("mon/super/path")

# Autre technique
fichiers <- list.files(path = "./data/out/datas/", pattern = "liste_urls_valides_.*\\.csv", full.names = TRUE)
test <- lapply(fichiers, read_csv)
donnees_combinees <- do.call(rbind, test)
  • ZIP
library(utils)
download.file("lien/vers/zip.zip", "dossier_complet.zip")
unzip("dossier_complet.zip")
data <- read_delim("dossier_complet.csv", ";", trim_ws = TRUE)
  • Google sheets
library(googlesheets4) 
data <- read_sheet("lien/vers/le/google/sheets")
# Attention : ne marche que si le tableau est en format GoogleSheet et pas Excel déposé sur Drive !!
  • Geo JSON
library(geojsonR)
library(httr)
temp_file <- tempfile(fileext = ".geojson")
GET("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/georef-france-commune/exports/geojson?lang=fr&refine=reg_name%3A%22Corse%22&facet=facet(name%3D%22reg_name%22%2C%20disjunctive%3Dtrue)&timezone=Europe%2FBerlin", write_disk(temp_file, overwrite = TRUE))
communes_contours_geo <- st_read(temp_file, quiet = TRUE)


API

  • Open Alex
parse_api_open_alex <- function(start, end){
    
    # Import des données : Works dataset, appels de l'API
    works_data <- purrr::map(
        .x = dois_bso[start:end,]$doi,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        possibly(.f = ~fromJSON(txt = paste("https://api.openalex.org/works/mailto:diane@datactivist.coop/doi:", .x, sep = ""), flatten = T), otherwise = NA_character_),
        .default = NA)
    
    # Aplatissement
        # sélection des 2 variables qui nous intéressent
    works_df <- purrr::map(
        .x = works_data,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        possibly(.f = ~unnest(data.frame(    # on récupère chaque élément/variable qui nous intéresse, on les met dans un df
                       doi = .x$doi, 
                       .x$authorships),
                   cols = "institutions", names_repair = "universal") |> select(doi, country_code), otherwise = NA_character_), 
        .default = NA)
        # suppression des NA et mise au format tabulaire
    works_df <- works_df[works_df !=  "NA"] # replace NA (DOIs non matchés avec OpenAlex) by NULL
    works_df <- rrapply(works_df, condition = Negate(is.null), how = "prune") #remove NULL
    works_df <- works_df |> bind_rows()
    
    # Export du df
    rio::export(works_df, glue("data/3.external/OpenAlex/french_CA/API_{start}_{end}.csv"))

}


### On applique la fonction pour 50 DOIs
parse_api_open_alex(1,50)


Web scraping

  • Easy scraping (data into table in 1 page)
library(rvest)
content <- read_html("url")
body_table <- content |> html_nodes('body')  |>
                    html_nodes('table') |>
                    html_table(dec = ",") 
data <- body_table[[1]]
  • Middle scraping (data into table in multiple pages)
library(rvest)
library(tidyverse)
data <- purrr::map(
        .x = (as.data.frame(rep(1:5, each = 1)) |> rename(page = `rep(1:5, each = 1)`))$page,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        .f = ~read_html(paste0("http://portal.core.edu.au/conf-ranks/?search=&by=all&source=all&sort=atitle&page=", .x)) |> html_nodes('body')  |> html_nodes('table') |> html_table(dec = ","), 
        .default = NA)
data <- bind_rows(data)
  • Difficult scraping (data in the body text in multiple pages)
# Code valable en janvier 2023, site a évolué maintenant
library(htm2txt)
core_millesime <- purrr::map(
        .x = (as.data.frame(rep(1:10, each = 1)) |> rename(page = `rep(1:10, each = 1)`))$page,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        possibly(.f = ~ as.data.frame(gettxt(paste0('http://portal.core.edu.au/conf-ranks/', .x, '/'))) |> #import page par page
    rename(text = 1) |> 
    mutate(text = strsplit(as.character(text), "\n")) |> unnest(text) |> #split les éléments séparés par des "\n"
    filter(row_number() ==  10 | #nom de conférence
               grepl("Acronym:", text) ==  TRUE | 
               grepl("Source:", text) ==  TRUE | 
               grepl("Rank:", text) ==  TRUE, #champs que l'on garde
           grepl("DBLP", text) ==  FALSE) |> #retrait de la ligne contenant ce string
    mutate(text = case_when(row_number() ==  1 ~ paste("Title:", text), TRUE ~ text), #ajout du préfixe "titre:"
           champ = str_extract(text, "^[a-zA-Z0-9_]*"), #dans une nouvelle colonne ce qui est avant ":"
           value = str_extract(text, "(?< = : )[^\n]*")) |> #dans une nouvelle colonne ce qui est après ": "
    select(-text) |> t() |> row_to_names(row_number = 1) |> data.frame() |> #transpose puis 1ère ligne en nom de colonnes
    pivot_longer(cols = -c(Title, Acronym), names_to = "number", values_to = "value", names_prefix = "Source|Rank") |> # format long pour rank et source quand multiples
    mutate(col = case_when(row_number() %% 2 ==  0 ~ "rank",
                           row_number() %% 2 ==  1 ~ "source")) |> #
    pivot_wider(names_from = col, values_from = value) |> select(-number) |> mutate(core_id = .x), otherwise = NA_character_),
        .default = NA)

# Gestion des Na et mise au format tabulaire
core_histo <- core_millesime[core_millesime !=  "NA"] # replace NA by NULL
core_histo <- rrapply::rrapply(core_histo, condition = Negate(is.null), how = "prune") #remove NULL
core_histo <- core_histo |> bind_rows()


Packages

  • Install and/or load multiple packages
packages = c("tidyverse", "jsonlite", "glue", "parallel", "doParallel", "foreach")
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)


Traitement de variables


Nettoyage de données


Noms de colonnes

  • Clean column names
library(janitor)
data <- data |> clean_names() # retire majuscules, espaces et caractères spéciaux
  • Clean specific column names
library(janitor)
clean_some_names <- function(dat, idx, ...) {
  names(dat)[idx] <- janitor::make_clean_names(names(dat)[idx], ...)
  dat
}
data <- data |> 
    clean_some_names(14:18)
  • Rename all columns with suffix except two
data <- data |> rename_at(vars(-Name, -State), ~ paste0(., '_2017'))
  • Order alphabetically columns
data <- data |> select(indicateurs, order(colnames(data)))
  • Order categorical column based on other categorical col
# Dans le cas où les niveaux changent, ex : "3 à 5 fois par semaine (12 commerces)"
table |> 
  mutate(discu_nb_commerces = factor(discu_nb_commerces, 
                                     levels = table |> 
                                       distinct(`Avez-vous l'habitude de discuter avec vos clients en dehors du cadre strict de la vente ? (exemple : prendre des nouvelles personnelles)`, discu_nb_commerces) %>%
                                       arrange(factor(`Avez-vous l'habitude de discuter avec vos clients en dehors du cadre strict de la vente ? (exemple : prendre des nouvelles personnelles)`, 
                                                      levels = c("Jamais", "Très rarement (moins d'une fois par mois)", "Rarement (1 à 2 fois par mois)", "Ponctuellement (1 à 2 fois par semaine)", "Régulièrement (3 à 5 fois par semaine)", "Très régulièrement (plusieurs fois par jour)"))) |> 
                                       pull(discu_nb_commerces)))


NAs

  • Fill NAs with previous values by group (repeat existing values)
data <- data |> 
    group_by(`Référence commande`) |> 
    fill(everything(), .direction = "downup") |> 
    ungroup()
  • Replace NA
# NAs d'une colonne par 0
data <- data |> mutate(col = replace_na(col, 0))

# NAs du df entier par 0
data <- data |> mutate_all(replace_na, 0)

# NAs par string
data <- data |> replace(is.na(.), "unknown")

# NAs par valeurs autre colonne
data <- data |> mutate(col_NA = coalesce(col_NA, col_replace))
  • Replace by NA
# Cellules vides par NAs
data <- data |> mutate_all(na_if, "")

# NULL par NAs
data <- data |> replace(. == "NULL", NA) 
data[data ==  "null"] <- NA

# Chiffres négatifs par NAs
data <- data |> mutate(col = replace(col, which(col<0), NA))

# Valeur par NAs sur certaines colonnes
data <- data |> mutate(across(starts_with("Choix_"), ~ na_if(.x, "Pas de préférence")))


Variables numériques

  • Round multiple columns
data <- data |> mutate_at(vars(var_3:var_17), ~round(.,0))
  • Round values to nearest 5
plyr::round_any(x, 5)
  • Shaping numeric variables
# Nombres arrondis au million
format(round(100000000 / 1e6, 1), trim = TRUE)

# Centaines et milliers séparés des virgules [comma]
format(as.integer(1000000, 0), nsmall = 1, big.mark = ".")
  • “Total” row per group
library(janitor)
data <- data |> 
    group_by(Var2) |> 
    group_modify(~ adorn_totals(.x, where = "row")) |> 
    ungroup() |> 
    arrange(fct_relevel(Commune, 'Total')) #puis trier avec ligne "Total" en haut puis ordre alphabétique sur colonne "Commune"

# Solving error "trying to re-add a totals dimension that is already been added"
data |> 
    untabyl() |> #ajouter avant le total
    adorn_totals() 


Indices, rep(), seq()

  • Index rep
c(rep(1:5570, each = 50), rep(5571, each = 7))
  • Between groups index (not within)
data <- data |> 
    group_by(defi_profil) |> 
    mutate(groupNbr = cur_group_id())


Dates

  • Change date format
data <- mutate(date = format(as.Date(Date, format = "%Y-%m-%d %H:%M:%S"),"%d %B %Y")) #30 mai 2023
# autres formats : https://www.r-bloggers.com/2013/08/date-formats-in-r/
  • Dates subtraction
data <- data |> mutate(nb_weeks = round(as.numeric(difftime(fin, Sys.Date(), units = "weeks")), 0), #semaine
                       nb_month = round(as.numeric(difftime(fin, Sys.Date(), units = "weeks") /4.34524), 0)) #mois
  • Dates sum
library(mondate)
data <- data |> mutate(date_fin = as.mondate(date_debut) + duree) #duree en mois


Âge

  • Check intersections between intervals
data <- data |> 
    mutate(has_intersection = all(sapply(1:(n() - 1), function(i) {
      all(pmax(min[i], min[(i + 1):n()]) <= pmin(max[i]+3, max[(i + 1):n()]+3)) 
    })), .by = author_id) #où min est âge_min et max est age_max de la tranche
  • Create age intervals
data <- data |> mutate(tranche_age = cut(age, c(18,20, seq(30, 90, 5), 98)))
  • Find age from date of birth
data <- data |> mutate(age = round(as.numeric(difftime(Sys.Date(), dateOfBirth, units = "weeks")) / 52.1429, 0)) #année


Chaînes de caractères

  • Replace string
# Valeurs d'une colonne
data <- data |> mutate(col = str_replace_all(col, c("pattern1 | pattern2" = "replacement")))

# Attention à escape les parenthèses pour que le remplacement fonctionne
data <- data |> mutate(col = str_replace_all(col, c("string avec \\(parenthèses\\)" = "replacement")))

# Valeurs du df entier
data <- data |> mutate_all(function(x) gsub("pattern1 | pattern2", "replacement", x))
  • Replace character of multiple columns
data |> mutate_at(vars(January:December), ~str_replace(., ",", "."))
  • Upper string and remove special characters
data <- data |> mutate(col = stringi::stri_trans_general(str = gsub("-", " ", toupper(string)), id = "Latin-ASCII"))
  • Upper first letter of both words
library(tools)
toTitleCase(tolower("MY STRING"))
toTitleCase("my other string")
  • Upper first letter of first word
gsub("^(\\w)(\\w+)", "\\U\\1\\L\\2", "my other string", perl = TRUE)
  • Remove specific words/characters
data <- data |> mutate(col = removeWords(string, c("IEEE ", "ACM ", "SIAM ")))
  • Remove duplicated words
rem_dup_word <- function(x){
  #x <- tolower(x)
  paste(unique(trimws(unlist(strsplit(x, split = " ", fixed = F, perl = T)))), collapse = " ")
}
rem_dup_word(x)
  • Remove isolated letters/characters
data <- data |> mutate(col = gsub("\\W*\\b\\w\\b\\W*", " ", string))
  • Remove blank spaces at the begining (or the end)
data <- data |> mutate(col = trimws(string, which = "left"))
  • Remove blank spaces at the begining and at the end
data <- data |> mutate(col = str_squish(col)) #specific column
data <- data |> mutate_all(~str_squish(.)) #all character columns
  • Remove first digit if 0
data <- data |> mutate(num = gsub("^0", "", num))
  • Remove special characters (all)
# Supprimer les caractères spéciaux ex : ? ' !
data <- data |> mutate(col = str_replace_all(col, "[^[:alnum:]]", " "))
  • Keep first word
data <- data |> mutate(first_word = word(string, 1))
  • Extract digits
library(strex)
data <- data |> mutate(min = str_nth_number(string, n = 1)) # extrait le 1er chiffre du string
  • Extract year
data <- data |> mutate(annee = str_extract(`En quelle année ?`, "(1|2)\\d{3}")) #seulement "\\d{5}" pour zipCode
  • Extract n first characters
data <- data |> mutate(sub_string = substr(string, 1, n))
  • Extract n last characters
data <- data |> mutate(sub_string = substr(string, nchar(string)-n+1, nchar(string)))
  • Add digit identifiant
data <- data |> mutate(num = sprintf("%02d", num)) #passer de 1 à 2 digits
data <- data |> mutate(num = str_pad(num, 14, pad = "0")) #obliger à avoir 14 caractères (donc ajoute 0 en début si besoin)
  • Only 1 character in a string
nchar(string) ==  1


Regular expressions (regex)

  • Detect digits
str_detect(string, "[0-9]") ==  TRUE
grep("\\d+", string, value = TRUE) 
  • Detect special characters (i.e. no letter nor digit)
grepl('[^[:alnum:]]', string)
  • Detect dates with format “%Y-%m-%d %H:%M:%S”
grepl("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", string)
  • Extract characters before first digit
str_extract(string, "^\\D+")
  • Extract characters before “:”
str_extract(string, "^[a-zA-Z0-9_]*")
  • Extract characters after “:”
str_extract(string, "(?<=: )[^\n]*")
  • Extract characters after “[”
str_extract(string, "(?<=\\[).*")
substr(Question, 1, nchar(Question)-1) #remove last character
  • Extract characters before “[”
str_extract("string bla [da]", "^.*\\[") #crochet inclu dans l'extraction
  • Extract characters before “(”
str_extract(string, "^[^(]+")
  • Extract characters before “,”
str_extract(string, "^[^,]+")
  • Extract characters between parenthesis
str_extract(string, "\\(.*?\\)")
  • Remove characters between parenthesis
str_remove(string,"\\([^)]*\\)")
  • Extract characters after blankspace
str_extract(string, "\\s+(.*)")
  • Extract all numbers and “+” character, and collapse into 1 cell
sapply(str_extract_all("+3214FSEtest!! 1", "[\\d+]+"), function(x) paste(x,collapse=""))
  • Replace misled zip code
# Reformater code postal mal renseigné (avec des espaces entre les 5 digits)
# Exemple : "49, avenue Du grau du rieu Marseillan Occitanie 34 340 France"
mutate(adresses = str_replace_all(adresses, "\\b(\\d{2}) (\\d{3})\\b", "\\1\\2"))
  • Extract domain name in URL
# Nom de domaine
mutate(domaine_url = str_extract(url, "(?<=^https?://)[^/]+"))
# Nom de domaine niveau 2 (jusqu'au 2è slash)
mutate(domaine_url2 = str_extract(url, "(?<=^https?://)[^/]+/[^/]+"))


Listes

  • Unlist
data <- data |>
    pull(column) |> pluck() |> bind_rows() |> 
    group_by(author_id) |> mutate(n = n()) |> select(author_id, n) |> distinct()
  • Complex unlist when simple pull/pluck is not working
# Exemple
comm1 <- data |> 
    filter(lengths(comments) != 0) |> 
    group_by(id) |> 
    mutate(nb_comments = nrow(comments[[1]])) |> 
    select(id2, id, comments, nb_comments) |> 
    pluck() |> bind_rows()
comm2 <- comm1 |>
    pull(comments) |> 
    pluck() |> bind_rows()
comm1.1 <- comm1 |> ungroup() |> 
    mutate(index = row_number()) |> 
    group_by(id) |> 
    slice(rep(1:n(), each = nb_comments)) |> 
    arrange(index)
proj_comm_date <- cbind(comm1.1, comm2) |> ungroup() |> 
    select(id2, author_id, publishedAt) |> 
    rename(date = publishedAt) |> mutate(type = "commentaires")
  • Unnest (Can’t combine x[[1]] and x[[5323]] )
data |> 
    mutate(col = lapply(col, as.character)) |> #mettre tout en caractères pour ne plus avoir l'erreur
    unnest(cols = col, keep_empty = TRUE)
  • List to characters separated with comma for column in a dataframe
data |> 
    mutate(ma_col = sapply(ma_col, function(x) paste(unlist(x), collapse = ", ")))
  • All list columns to characters
data |> 
    mutate(across(where(is.list), ~ sapply(.x, function(y) paste(unlist(y), collapse = ", "))))


Filtres

  • Remove values containing string
data <- data |> filter(!grepl(',', column)) #containing comma
  • Keep values containing string
data <- data |> filter(grepl("mot particulier", column) ==  TRUE)
data <- data |> filter_all(all_vars(grepl("mot", .)))
  • Even and odd lines
data |> filter(row_number() %% 2 ==  0) # pair
data |> filter(row_number() %% 2 ==  1) # impair
  • All columns equal a value
data |> group_by(cat) |> filter(across(where(is.character), ~. != "N/A"))
  • At least 2 values by group
data |> 
    filter(any(projet %in% c("proj1234", "proj4321")), 
           .by = id)
  • Filter on first and second minimum values by group
data |> 
    filter((degre_etude == min(degre_etude) | degre_etude == min(degre_etude[degre_etude != min(degre_etude)])) |
           (degre_etude == min(degre_etude) | is.na(degre_etude)), 
         .by = id)


Select

  • Possibly select columns based on other df’s columns
data |> 
    select(any_of(names(raw_data)))


Traitement de dataframes


Formats

  • Pivot longer
m3 <- data |> select(c(BATIMENTS:TYPE_DE_BATIMENTS, starts_with("m3"))) |> 
    mutate_all(as.character) |> 
    pivot_longer(cols = -c(BATIMENTS:TYPE_DE_BATIMENTS), names_to = "Annee", values_to = "m3", names_prefix = "m3_")
montant <- data |> select(c(BATIMENTS:TYPE_DE_BATIMENTS, starts_with("montant"))) |> 
    mutate_all(as.character) |> 
    pivot_longer(cols = -c(BATIMENTS:TYPE_DE_BATIMENTS), names_to = "Annee", values_to = "montant", names_prefix = "montant_")
final <- cbind(m3, montant |> select(montant))
  • Pivot wider
data <- data |> 
    pivot_wider(names_from = choix, values_from = nb_interesses, names_prefix = "choix_")
  • Split comma separated values
# Split 
data <- data |> mutate(journal_issns = strsplit(as.character(journal_issns), ",")) |> unnest(journal_issns)
# Unsplit
data <- data |> mutate(journal_issns = paste0(unique(na.omit(journal_issns)), collapse = ","))
  • Repeat row based on 1 column number
data |> uncount(x)
  • Repeat row based on several column number
data.frame(col1 = c(0, 167, 73),
           col2 = c(62, 0, 73)) |> 
    slice(rep(1:n(), times = c(col2[1], col1[2], col2[3])))
  • Random values with specific probability
random <- c("groupe 1", "groupe 2", "groupe 3")
sample(random, size = nrow(data), replace = TRUE, prob = c(1/2,3/5,2/5))


Merge, join

  • Merge 2 or more columns into 1
data <- data |> mutate(new_col = coalesce(col1,col2,col3))
  • Fuzzy join
library(fuzzyjoin)
data <- stringdist_left_join(data, data2, by = "col_name", max_dist = 5, distance_col = "distance") |> 
  group_by(nom) |> slice_min(distance)
  • Merge 2 dataframes ~rbind()
data_merged <- merge(df_1, df_2, all = TRUE) 
  • Alternative rbind() when different number of columns
data <- list(cat1, cat2) |>  bind_rows(.id = 'origine_df')
  • Differences 2 dataframes
anti_join(df1, df2)
  • Similarities 2 dataframes
semi_join(df1, df2)

Autre

  • Redundancy test
identical(data$id1, data$id2)
  • Increase total system memory R session (RAM)
library(unix) #pour linux
rlimit_as(1e20)  #increases to ~12GB
  • Sample random rows of a df
df[sample(nrow(df), 3), ] #pour récupérer 3 lignes


Analyse de données


Opérations, statistiques

  • NAs per column of a dataframe
# Une colonne
data |> count(is.na(col_name))

# Toute les colonnes
nb_NA <- as.data.frame(apply(is.na(data), 2, sum)) |> 
                       rename(`nombre de NA` = `apply(is.na(data), 2, sum)`) |>
                       mutate(pourcentage = `nombre de NA`/nrow(data)*100) |> 
                       mutate(pourcentage = round(pourcentage, 2)) |> 
                       arrange(desc(pourcentage)) |> 
    rownames_to_column() |> 
    rename(variable = rowname)
  • Non NAs per group
table <- data |> 
    summarise_all(list(~sum(!is.na(.))), .by = group)
  • Values frequency
data <- as.data.frame(table(data$column))  #R base
data <- data |> group_by(group) |> count(column)   #dplyr
data <- data |> summarise(n = n(), .by = group) #dplyr
  • Sum in pipe for specific rows
data |> mutate(new_cat = sum(n[Catégorie2 == "Total"])) #[]
  • Calculate sum accross mutliple columns df
data |> rowwise() |> mutate(sum_multiple = sum(c_across(var_3:var_17)))
  • Calculate difference between rows by group
data |> group_by(Structure) |> mutate(ecart = Pourcentage - lag(Pourcentage))
  • Weighted mean and median
# Fonction pour calculer une médiane pondérée
weighted_median <- function(x, w) {
  df <- data.frame(x = x, w = w) |>
    arrange(x)
  cum_w <- cumsum(df$w)
  cut_point <- sum(df$w) / 2
  median <- df$x[which(cum_w >= cut_point)[1]]
  return(median)
}

# Calculs pondérés
library(stats)
library(questionr)
data |> mutate(n_moyen_stats = round(weighted.mean(n_repondants, PONDFIN_logit), 2), #package stats
               n_moyen_questionr = round(questionr::wtd.mean(n_repondants, PONDFIN_logit), 2), #package questionr
               n_moyen_manuel = round(sum(n_repondants * PONDFIN_logit) / sum(PONDFIN_logit), 2), #calcul manuel
               n_median_pondere = weighted_median(n_repondants, PONDFIN_logit)) #médiane
  • Geocode and reverse geocode
# Geocoder (obtenir longitude et latitude à partir du code postal)
data_geoloc <- data |> 
    select(zip_code) |> 
    mutate(pays = "France") |> 
    na.omit() |> 
    geocode(postalcode = zip_code, country = pays, method = 'osm', lat = latitude , long = longitude)

# Reverse geocoder (obtenir l'adresse à partir de longitude / latitude)
data <- data_geoloc |> 
  reverse_geocode(lat = latitude, long = longitude, method = 'osm', full_results = TRUE)


Dataviz

  • Reorder values before plotting
# geom_bar ordre alphabetic, après arrange()
mutate(colonne = factor(colonne, levels = rev(unique(colonne)))) 
# geom_bar décroissant selon n
mutate(colonne = fct_reorder(colonne, n))
  • Arrange multiple plots
# Afficher plusieurs ggplots
library(gridExtra)
grid.arrange(g1,g2,g3, ncol = 3, nrow = 1, 
             top = grid::textGrob("Titre", gp = grid::gpar(fontsize = 15, font = 2)))

# Aligner les boxes
library(cowplot)
plot_grid(p3.1, p3.2, p3.3, p3.4, p3.5, align = 'vh')

# Afficher plusieurs ggplotlys
library(plotly)
subplot(plotly_positif, plotly_negatif, nrows = 1)

# Pas de message grid.arrange() dans rmd
graph <- grid.arrange(g1,g2)
grid::grid.draw(graph)
  • Interactive graph
# Passer en plotly
ggplotly(graph, tooltip = c("text")) |> 
    layout(xaxis = list(autorange = TRUE), yaxis = list(autorange = TRUE)) #auto adjust scale when click on element

# Passer en giraph : mettre les geométries en interactif !! ex: geom_segment_interactive()
graphc <- ggplot(data, aes(x = Réponses, y = Pourcentage, fill = Edition,
                              tooltip = paste0(Pourcentage*100, "% en ", Edition))) +#texte au survol
    geom_point_interactive() + 
    theme_minimal()
girafe(print(graph), width_svg = 15, height_svg = 12)
  • Highlight bar on stacked barplot with alpha
data |> 
  ggplot(aes(y = n)) +
  geom_col(aes(x = cycle, fill = rowname, alpha = cycle != "Catégorie"), color = "white", position = "stack", width = 0.7) +
  scale_alpha_manual(values=c(1, .4))
  • Highlight several bars on barplot with alpha
data_stat |> 
    mutate(a_surligner = case_when(type == "votes" ~ "1", type == "questionnaires"~ "1", .default = "0")) |> ungroup() |> 
    type_contrib("Les jeunes plébiscitent les outils de consultation vs. les outils de débat", "Fig. 31", 60, "") +
    geom_bar(aes(y=n, x=type, fill = a_surligner), position="dodge", stat="identity", width=.6) +
    geom_bar(aes(y=n_ref, x=type, linetype = "proportions de l'échantillon global"), 
             position="dodge", stat="identity", width=.6, color = "#666666", fill = NA, size = 1) +
    geom_label(aes(x = type, y = n+60, label = ecart), size = 5, fill = "white", label.size = NA) +
    scale_fill_manual(values = c("1" = "#83b4d1", "0" = "#cde1ec")) +
    guides(fill = "none")


Par élément

Geometry

# Geométrie initiale
  geom_line(size = 1.7, alpha = 0.9, linetype = 1, color = "#0066CC") +
  geom_point(colour = "#0066CC", fill = "#0066CC", size = 2, pch = 21, stroke = 1.5) +
  geom_bar(position = position_dodge(.9), stat = "identity", width = .8, fill = "#2B73B4") + #.9 et width pour barres pas collées
  geom_bar(aes(x = forcats::fct_infreq(adequation))) + #fct_infreq pour ordonner selon count
  geom_col(position = "stack", width = 0.7, color = "white") +  coord_flip() + #cas particulier de geom_bar où on prend n comme Y et non count
  geom_text_wordcloud(family = "Montserrat") +
    
# Géométrie additionnelle
  geom_text(aes(y = 1, label = title_projet), hjust = "bottom", #aligner geom_text à gauche avec coord_flip
            fontface = "italic", size = 5, hjust = 0, lineheight = 0.8) + #lineheight pour régler l'interligne quand label sur plusieurs lignes
  geom_label(aes(y = 1, label = title_projet), hjust = "bottom", fontface = "italic", size = 2.6,
             fill = "white", label.size = NA, hjust = 0) + #white background and remove black borders à la fin sinon marche pas !
  geom_label(aes(y = 1, label = title_projet), hjust = "bottom", fontface = "italic", size = 2.6,
             fill = "white", label.size = NA, position = position_dodge(width = .9), hjust = 0) + #pour double barres plot (dodge)
  stat_count(geom = "text", colour = "white", size = 4,
             aes(label = ..count.., y = ..count..+.7), #y pour positionnement juste au dessus des barres
             position = position_stack(vjust = 0.5)) + #geom_text des geom_bar sans y
  geom_vline(xintercept = -.5, linetype = 2, color = "#0066CC") +
    
# Annotate a graph
coord_fixed(clip = 'off') # geom_label() déborde du graph

Scales

  xlim(1, 100) +
  scale_y_continuous(labels = scales::comma) + #grands chiffres lisibles
  scale_y_continuous(breaks = scales::pretty_breaks()) + #breaks réguliers, plus lisible (pas d'axe)
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) + # pourcentages
  scale_y_discrete(limits = 1:12) + #valeurs discrètes
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) + #axis-text trop longs sur plusieurs lignes
  scale_color_continuous(high = "#132B43", low = "#56B1F7") #reverse color
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10), limits = c(-.2, 10)) # de 0 à 10 avec les breaks spécifiés
  scale_fill_manual(values = c("Non, mais c’est prévu dans les 12 prochains mois" = "#e1b44d", 
                             "Oui, au cours des deux dernières années" = "#323465", 
                             "Oui" = "#33bbc9"),
                  labels = c("Non, mais c’est prévu dans les 12 prochains mois" = "Prévue dans les 12 prochains mois", 
                             "Oui, au cours des deux dernières années" = "Engagée au cours des deux dernières années", 
                             "Oui" = "Engagée"), #rename categories legend
                  breaks = c("Oui", "Oui, au cours des deux dernières années", "Non, mais c’est prévu dans les 12 prochains mois")) #order items legend

Labs

# titres trop longs, automatiquement coupés
title = stringr::str_wrap("Exemple de titre très très très très très très très très très très très très long", width = 45)

# titres avec des mots colorés
library(ggtext)
ggplot(data) +
    geom_point() +
    labs(title = "Dans mon titre je veux mettre en avant <span style='color: #323465; font-size: 23pt;'>cette catégorie</span> par rapport aux autres") +
    theme(plot.title = element_markdown()) #coloré et taille plus grande

Theme

  theme_classic() +
  theme(panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"), #lignes horizontales fond graphique en gris (BBC thème)
        strip.text.x = element_text(face = "bold"), #label des facettes
        axis.title.x = element_text(margin = margin(t = 5, r = 0, b = 5, l = 0)), #augmenter marges entre texte et labels des axes
        plot.title = element_textbox_simple(hjust = 1), #hjust: titre aligné à droite, element_textbox_simple line break auto titre
        plot.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"), #background couleur Datactivist
        panel.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"),
        legend.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"),
        legend.box = "vertical", legend.box.just = "left", #multiple guide_legend each one on new row, for top legend 
        ) +
  • theme_custom() BBC
font <- "Helvetica"
theme_custom <- function (){
    font <- "Helvetica"
    ggplot2::theme(plot.title = ggplot2::element_text(family = font,size = 21, face = "bold", color = "#222222"), 
        plot.subtitle = ggplot2::element_text(family = font,size = 18, face = "italic", margin = ggplot2::margin(0, 0, 9, 0)), 
        plot.caption = ggplot2::element_text(family = font,size = 18, margin = ggplot2::margin(9, 0, 9, 0)), 
        plot.title.position = "plot",
        plot.caption.position = "plot",
        legend.title = ggplot2::element_text(family = font, size = 18, color = "#222222"), 
        legend.position = "top", 
        legend.text.align = 0, 
        legend.background = ggplot2::element_blank(),
        legend.key = ggplot2::element_blank(),
        legend.text = ggplot2::element_text(family = font, size = 18,color = "#222222"), 
        axis.text = ggplot2::element_text(family = font, size = 15,color = "#222222"), 
        axis.text.x = ggplot2::element_text(margin = ggplot2::margin(5,b = 10)), 
        axis.title = ggplot2::element_text(family = font, size = 18,color = "#222222"),
        axis.ticks = ggplot2::element_blank(),
        axis.line = ggplot2::element_blank(), 
        panel.grid.minor = ggplot2::element_blank(),
        panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.background = ggplot2::element_blank(),
        strip.background = ggplot2::element_rect(fill = "white"),
        strip.text = ggplot2::element_text(size = 22, hjust = 0, face = "bold"))
}
theme_custom_largeG <- function (){
    font <- "Helvetica"
    ggplot2::theme(plot.title = ggplot2::element_text(family = font,size = 25, face = "bold", color = "#222222"), 
        plot.subtitle = ggplot2::element_text(family = font,size = 18, face = "italic", margin = ggplot2::margin(0, 0, 9, 0)), 
        plot.caption = ggplot2::element_text(family = font,size = 18, margin = ggplot2::margin(9, 0, 9, 0)), 
        plot.title.position = "plot", #titre commence où y-axis commencent !!
        plot.caption.position = "plot",
        legend.title = ggplot2::element_text(family = font, size = 18, color = "#222222"), 
        legend.position = "top", 
        legend.text.align = 0, 
        legend.background = ggplot2::element_blank(),
        legend.key = ggplot2::element_blank(),
        legend.text = ggplot2::element_text(family = font, size = 18,color = "#222222"), 
        axis.text = ggplot2::element_text(family = font, size = 24,color = "#222222"), 
        axis.text.x = ggplot2::element_text(margin = ggplot2::margin(5,b = 10)), 
        axis.title = ggplot2::element_text(family = font, size = 27,color = "#222222"),
        axis.ticks = ggplot2::element_blank(),
        axis.line = ggplot2::element_blank(), 
        panel.grid.minor = ggplot2::element_blank(),
        panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.background = ggplot2::element_blank(),
        strip.background = ggplot2::element_rect(fill = "white"),
        strip.text = ggplot2::element_text(size = 22, hjust = 0, face = "bold"))
}
# change grid when coord_flip()
theme(panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
      panel.grid.major.y = ggplot2::element_blank())

Facet

  facet_grid(Projet ~ ., 
             scales = "free", #scales = "free" pour label différents d'une facette à une autre
             space = "free") + #space = "free" pour hauteur différentes selon le nombre d'éléments par facette
  facet_zoom(x = annee > 2014, split = TRUE) +
  ggforce::facet_col(facets = vars(Projet), 
                     scales = "free_y", 
                     space = "free") # pour avoir scales et face de facet_grid avec labels to the top de facet_wrap
  facet_wrap(~vote, scales='free_x') + scale_y_continuous(limits=c(0 ,50)) # pour avoir les ticks sur chaque facettes et pas juste celles du bas

Legend

  guides(fill = guide_legend(nrow = 6, byrow = TRUE,  # nombre d'éléments par ligne
                             title = "titre légende"), # titre légende
        lwd = "none",  #ne pas afficher une légende en particulier
        col = guide_legend(title = "", reverse = TRUE, override.aes = list(lwd = 2))) + #lwd = 2 pour ligne plus épaisse et plus visible dans la légende

# Deux légendes sur un même graph, affichées l'une sous l'autre
theme(legend.box = "vertical", legend.box.just = "left")

Colors

scale_fill_manual(values = c("#c898ae", "#da4729", "#f38337", "#74a466", "#fecf5d", "#5E79AC")) #couleurs Bauhaus


Graphiques

Donut / pie chart

  • Donut
# Data pour le graphique
data_graph <- data.frame(Categorie = c("AA", "BB", "CC"),
                         Valeur = c(40, 40, 20)) |> 
    mutate(percent = round(Valeur / sum(Valeur) * 100, 0))

# Dataviz
ggplot(data_graph, aes(x = 2, y = Valeur, fill = Categorie)) +
  geom_col(col = "white", linewidth = 2) +
  geom_text(aes(label = paste0(percent, "%"), color = Categorie),
            position = position_stack(vjust = 0.5)) +
  geom_text(aes(x = 0.2, y = 0, label = sum(Valeur)), col = "#333333", alpha = 0.8, size = 8, fontface = "bold") +
  coord_polar(theta = "y") +
  scale_fill_manual(values = c('#fecf5d', '#2B73B4','#82888d')) +
  scale_color_manual(values = c("AA" = "black", "BB" = "white", "CC" = "white")) +
  xlim(c(0.2, 2 + 0.5)) +
  labs(title = "Répartition des catégories selon les espèces") +
  guides(fill = guide_legend(title = "Catégories"),
         col = "none") +
  theme(panel.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "top",
        legend.title = element_text(size = 16,color = "#222222"), 
        legend.text = element_text(size = 13,color = "#222222"), 
        plot.title.position = "plot",
        plot.title = element_text(size = 18, face = "bold", color = "#222222"))

  • Pie chart
# Dataviz
ggplot(data_graph, aes(x = 0, y = Valeur, fill = Categorie)) +
  geom_col(col = "white", linewidth = .6) +
  geom_text(aes(label = paste0(percent, "%"), color = Categorie),
            position = position_stack(vjust = 0.5)) +
  coord_polar(theta = "y") +
  scale_fill_manual(values = c('#fecf5d', '#2B73B4','#82888d')) +
  scale_color_manual(values = c("AA" = "black", "BB" = "white", "CC" = "white")) +
  labs(title = "Répartition des catégories selon les espèces") +
  guides(fill = guide_legend(title = "Catégories"),
         col = "none") +
  theme(panel.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        legend.position = "top",
        legend.title = element_text(size = 16,color = "#222222"), 
        legend.text = element_text(size = 13,color = "#222222"), 
        plot.title.position = "plot",
        plot.title = element_text(size = 18, face = "bold", color = "#222222"))

Histogram

  • Simple histogram
# Dataviz
iris |> 
    ggplot() +
      geom_histogram(aes(x = sepal_length),
                     bins = 7L, col = "white", fill = "#2B73B4", width = 5) +
      geom_vline(xintercept = mean(iris$sepal_length, na.rm = TRUE), linetype = 2, col = "red") +
      geom_text(aes(x = mean(sepal_length, na.rm = T) + .2, y = 38, 
                    label = paste("Moyenne :", round(mean(sepal_length, na.rm = T), 1), "cm")), 
                col = "red", fontface = "italic", hjust = 0, size = 5) +
      labs(x = "Valeur",y = "Fréquence", title = "Distribution de la longeur des pétales des iris",
           subtitle = paste(iris |> filter(is.na(sepal_length)) |> nrow(), "valeur manquante")) +
      theme_custom() +
      theme(plot.subtitle = element_text(face = "italic"),
            plot.title = element_text(face = "bold")) +
      scale_x_continuous(n.breaks = 10)

  • Histogram on multiple variables
# Dataviz
iris |> 
  reshape2::melt(id.vars = c("species")) |> 
  ggplot() +
      geom_histogram(aes(x = value),
                     bins = 10L, fill = "#2B73B4", binwidth = .2) +
      labs(x = "Valeur", y = "Fréquence", 
           title = "Distribution des différents éléments des iris") +
      theme_custom() +
      facet_wrap(variable ~ .)

  • Histogram with normal distribution
# Dataviz
iris |> 
  ggplot() +
    geom_histogram(aes(x = sepal_length, y=..density..),
                   bins = 7L, color="#e9ecef", fill = "#2B73B4") +
    stat_function(fun = dnorm, args = list(mean = mean(iris$sepal_length), sd = sd(iris$sepal_length)), 
              size = 1, alpha = .8, aes(col = "Distribution normale")) +
    labs(x = "Valeur", y = "Densité", col = "",
         title = "Distribution de la longeur des pétales des iris") +
    theme_custom() +
    scale_x_continuous(n.breaks = 10)

Barplot

  • Highlight one category
# Data pour le graphique
data_graph <- data.frame(
  stringsAsFactors = FALSE,
         Categorie = c("AA", "BB", "CC", "DD", "EE"),
            Valeur = c(17, 43, 5, 14, 29))

# Graph
data_graph |> 
    mutate(percent = round((Valeur / sum(Valeur))*100, 0),
           Categorie = fct_reorder(Categorie, Valeur)) |> 
    ggplot()+
        geom_bar(aes(x = Categorie, y = Valeur, alpha = Categorie != "EE"), 
                 stat = "identity", width = .6, fill = "#2B73B4") +
        geom_text(aes(y = Valeur+.05*max(Valeur), x = Categorie, label = paste(percent,"%",sep = "")), 
                  color = "#333333", check_overlap = T) +
        scale_alpha_manual(values = c(.9, .4)) +
        coord_flip() +
        labs(y = "Fréquence", title = "Répartition des catégories selon la valeur") +
        theme_custom() +
        theme(legend.position = "none",
              axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
              panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
              panel.grid.major.y = ggplot2::element_blank())

  • Barplot ordering bars with geom_bar()
iris |> 
    filter(sepal_width >= 3) |> 
    ggplot() +
      aes(x = reorder(species, species,
                         function(x)+length(x))) + #+ pour descendant, - pour ascendant
      geom_bar(fill = "#3182BD", alpha = .9) +
      coord_flip() +
      theme_custom() +
      theme(panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
            panel.grid.major.y = ggplot2::element_blank(),
            axis.title.y = element_blank())

  • Barplot en facet_grid
# Données pour le graph
table_graph_global <- data.frame(
   stringsAsFactors = FALSE,
             Acteur = c("A","A","A","A","B","B",
                        "B","B","C","C","C","C","D","D","D","D","E",
                        "E","E","E","F","F","F","F","G","G","G"),
         Importance = c("Forte","Moyenne",
                        "Indispensable","Faible","Indispensable","Forte","Moyenne",
                        "Faible","Indispensable","Forte","Moyenne","Faible",
                        "Forte","Moyenne","Indispensable","Faible","Moyenne",
                        "Forte","Indispensable","Faible","Forte",
                        "Indispensable","Moyenne","Faible","Forte","Moyenne",
                        "Faible"),
         nb_actions = c(6,6,5,3,4,7,3,
                        3,2,5,3,3,6,7,2,2,3,3,1,2,4,
                        2,2,3,5,3,2),
  nb_actions_acteur = c(20,20,20,20,17,17,
                        17,17,13,13,13,13,17,17,17,17,9,
                        9,9,9,11,11,11,11,11,11,11)
)
# Viz globale
table_graph_global |> 
  #ajout des infos quand aucune action pour telle importance pour un acteur en particulier
  add_row(Acteur = "G", Importance = "Indispensable", nb_actions = 0, nb_actions_acteur =11) |> 
  #tri des valeurs
  mutate(Acteur = fct_reorder(Acteur, nb_actions_acteur),
         Importance = factor(Importance, levels = c("Faible", "Moyenne", "Forte", "Indispensable"))) |> 
  #graph
  ggplot() +
  geom_col(aes(x = Acteur, y = 7), fill = "#F3F3F3", width = .85) +
  geom_col(aes(x = Acteur, y = nb_actions, fill = Importance), width = .85) +
  geom_text(aes(x = Acteur, y = nb_actions-.4, col = Importance,
                label = ifelse(nb_actions != 0, nb_actions, ""))) +
  geom_text(aes(x = Acteur, y = 6.5, 
                label = ifelse(Importance == "Indispensable", nb_actions_acteur, "")), col = "black") +
  labs(y = "Nombre d'actions", title = "Nombre d'actions à mener par chaque acteur selon leur importance") +
  scale_fill_manual(values = c("Faible" = "#2B73B4", "Moyenne" = "#fecf5d", 
                                   "Forte" = "#ed8b00", "Indispensable" = "#dd4124")) +
  scale_color_manual(values = c("Faible" = "white", "Moyenne" = "black", 
                                   "Forte" = "white", "Indispensable" = "white")) +
  coord_flip() +
  facet_grid(~Importance) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text.x = element_blank(),
        panel.grid = element_blank(),
        title = element_text(face = "bold", size = 15),
        strip.text = element_text(size = 12, hjust = 0.085),
        plot.title.position = "plot")

  • Grouped barplot
library(ggtext)

# Data pour le graphique
data_graph <- data.frame(
  stringsAsFactors = FALSE,
         Categorie = c("AA", "BB", "CC", "DD", "AA", "BB", "CC", "DD"),
            Percent = c(0.47, 0.25, 0.13, 0.15, 0.42, 0.28, 0.11, 0.19),
             Annee = c("2020", "2020", "2020", "2020", "2025", "2025", "2025", "2025"),
            Ecart = c("-5", "+3", "-2", "+4"))

# Dataviz
data_graph |> 
  mutate(max_percent = max(Percent), .by = Categorie) |> 
  ggplot(aes(x = Categorie, y = Percent, fill = Annee)) +
  geom_bar(position="dodge", stat="identity", width=.6, alpha = .9) +
  coord_flip() +
  labs(x = "", y = "Pourcentage", 
       title = stringr::str_wrap("Évolution des réponses entre <span style='color: #fecf5d;'>2020</span> et <span style='color: #2B73B4;'>2025</span>", width = 55)) +
  scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 40)) + 
  scale_y_continuous(labels = scales::percent) + # pourcentages
  scale_fill_manual(values = c("2020" = "#fecf5d", "2025" = "#2B73B4")) +
  theme_custom() +
  theme(legend.position = "none",
        panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.y = ggplot2::element_blank(),
        plot.title = element_markdown()) +
  geom_label(aes(x = Categorie, y = max_percent, label = paste0(Ecart, "%")), 
             position="dodge", color = "#333333", hjust = 0, 
             fill = "white", label.size = NA)

Pyramid

library(lemon)

# Data pour le graphique
data_graph <- data.frame(
  stringsAsFactors = FALSE,
               Age = c("[0;15[","[15;40[","[40;65[","[65;100[",
                       "[0;15[","[15;40[","[40;65[","[65;100["),
              Sexe = c("Homme","Homme","Homme","Homme",
                       "Femme","Femme","Femme","Femme"),
           Nb_pers = c(5, 17, 32, 12, 7, 22, 25, 18))
  
# Dataviz
data_graph |> 
  mutate(percent = Nb_pers / sum(Nb_pers) *100,
         percent = ifelse(percent < 0.5, round(percent, 1), round(percent, 0)), 
         .by = Sexe) |> 
  ggplot(mapping = aes(x = ifelse(Sexe ==  "Homme", -Nb_pers, Nb_pers), y = Age, fill = Sexe)) +
  geom_col(size = 1.3) + 
  geom_label(aes(y = Age, 
                x = ifelse(Sexe ==  "Homme", -Nb_pers, Nb_pers), 
                label = paste(percent,"%",sep = ""), 
                hjust = ifelse(Sexe ==  "Homme", 1, 0)), 
            color = "#333333", check_overlap = T, fill = "white", label.size = NA) +
  #valeur sur l'axe en valeurs absolues
  scale_x_symmetric(labels = abs, 
                    limits = c(0, max(data_graph$Nb_pers)+0.1*max(data_graph$Nb_pers))) +
  scale_colour_manual(values = c('#fecf5d', '#2B73B4'),
                      aesthetics = c("colour", "fill")) +
  labs(x = "Fréquence", y = "", title = "Pyramide des âges de la population interrogée") +
  theme_custom() + 
  theme(legend.position = "top",
        panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.y = ggplot2::element_blank()) +
  guides(fill = guide_legend(title = "", reverse = TRUE))

Radar

#devtools::install_github("ricardo-bion/ggradar")  
library(ggradar)

# Dataviz
iris |> 
  select(sepal_length, sepal_length:petal_width) |> 
  summarise_all(funs(median(., na.rm = T))) |> 
  ggradar(values.radar = c("1", "3", "5"),
          grid.min = 1, grid.mid = 3, grid.max = 5,
          # Polygones
          group.line.width = 1, 
          group.point.size = 3,
          group.colours = "#2B73B4",
          # Arrière-plan et lignes de grille
          background.circle.colour = "white",
          gridline.mid.colour = "grey") +
  xlim(-10,10) + #selon longueur catégories
  labs(title = "Mesure moyenne des iris") +
  theme(legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(face = "bold"))

Infographics

  • Infographie simple
library(icons) ## remotes::install_github("mitchelloharawild/icons")
library(tidyverse)
# Table
df <- data.frame(
    x = c(2, 8.5, 15, 21.5),
    y = rep(6.5, 4),
    h = rep(4.25, 4),
    w = rep(6.25, 4),
    value = c(5, 7, 17, 5),
    info = c("Communes",
             "Participants",
             "JDD",
             "Réutilisations"),
    color = factor(1:4)
)

# Graphique
ggplot(df, aes(x, y, height = h, width = w, label = info)) +
    ## Create the tiles using the `color` column
    geom_tile(aes(fill = color)) +
    ## Add the numeric values as text in `value` column
    geom_text(color = "white", fontface = "bold", size = 10,
              aes(label = ifelse(value > 999, format(as.integer(value, 0), nsmall = 1, big.mark = "."), value), 
                  x = x, y = y+.5), 
              hjust = 1) +
    ## Add the labels for each box stored in the `info` column
    geom_text(color = "white", fontface = "bold", size = 5,
              aes(label = info, x = x - 2.9, y = y - 1), hjust = 0) +
    coord_fixed(expand = F) +
    #scale_fill_manual(type = "qual", palette = "Dark2") +
    scale_fill_manual(values = c("#9bcea4", "#ef7875", "#ffcc00", "#23ae84", "#fecf5d", "#2B73B4")) +
    ## Use `geom_text()` to add the icons by specifying the unicode symbol.
    theme_void() +
    guides(fill = FALSE)

  • Infographie mise en page
# Données
df <- data.frame(
    x = c(2, 2, 2),
    y = c(2, 6.5, 11), 
    h = rep(4.25, 3),
    w = rep(18, 3),
    value = c("46%", "3", "77%"),
    info = c("des participants à l'enquête blablala ", "communes sur 10 considèrent que blablala", "des commerçants ont à coeur de blablabla"),
    icon = c(emojifont::fontawesome("fa-handshake-o"), emojifont::fontawesome("fa-comment-o"), emojifont::fontawesome("fa-comments-o")),
    font_family = c(rep("FontAwesome", 3)),
    color = factor(1:3))

# Graph
ggplot(df, aes(x, y, height = h, width = w, label = info)) +
    ## Create the tiles using the `color` column
    geom_tile(aes(fill = color)) +
    ## Add the numeric values as text in `value` column
    geom_text(color = c("1" = "white", "2" = "white", "3" = "white"), family = "Din", fontface = "bold", size = 18,
              aes(label = value, x = x - 4.1, y = y + 1), hjust = 0) +
    ## Add the labels for each box stored in the `info` column
    geom_text(color = c("1" = "white", "2" = "white", "3" = "white"), family = "Helvetica", fontface = "bold", size = 4,
              aes(label = str_wrap(info, width = 50), x = x - 4.1, y = y - .8), hjust = 0, lineheight = 0.5) +
    coord_fixed() +
    scale_fill_brewer(type = "qual",palette = "Dark2") +
    ## Add the icons by specifying the unicode symbol.
    geom_text(color = c("1" = "#087370", "2" = "#FFB4A6", "3" = "#9AB0B0"),
              size = 23, aes(label = icon, family = font_family,
                             x = -4.5, y = y + 0.15), alpha = 0.9) +
    # Couleurs
    scale_fill_manual(values = c("1" = "#9AB0B0", "2" = "#EC6459", "3" = "#087370")) +
    # Titre et thème
    labs(title = "    Dans l'échantillon des répondants :") +
    theme_void() +
    theme(plot.title = element_text(size = 18, face = "bold", color = "#222222")) +
    guides(fill = FALSE)

Correlation matrix

  • With corrplot package
library(corrplot)

# Data pour le graphique
matrix <- cor(iris |> select(-species))

# Dataviz
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(matrix, method="color", col=col(200),  
         type="upper", order="hclust", 
         addCoef.col = "black", # Ajout du coefficient de corrélation 
         tl.srt = 45, tl.col = "black", tl.cex = .8, #Rotation des etiquettes de textes
         diag = TRUE, mar=c(0,0,5,0), 
         title = "Correlation négative entre sepal_width et les autres mesures")

  • With ggplot
library(ggcorrplot)
library(ggtext)

# Dataviz
    # couleurs
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
bgcolors <- matrix("black", nrow(matrix), ncol(matrix),dimnames = dimnames(matrix))
bgcolors[,1] <- "red"
bgcolors <- bgcolors[lower.tri(bgcolors, diag=TRUE)]
    # matrice
ggcorrplot(matrix, hc.order = T, type = "lower", show.diag = TRUE, legend.title = "",
           lab = TRUE, lab_col = bgcolors, colors = c("#BB4444", "white", "#4477AA")) +
    labs(title = "Correlation négative entre <span style='color: red;'>sepal_width</span> <br>et les autres mesures") +
    geom_label(aes(x = 3, y = 4), label = "petal_width", vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 2, y = 3), label = "petal_length", vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 1, y = 2), label = "sepal_length", vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 0, y = 1), label = "sepal_width", vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    coord_fixed(clip = 'off') +
    theme(axis.text.y = element_blank(),
          panel.grid = element_blank(),
          plot.title.position = "plot",
          plot.title = element_markdown(size = 18, lineheight = .2))

Carto

  • General cartography leaflet (points)
library(leaflet)
library(htmltools)

# Data pour le graphique
data_graph <- data.frame(
  stringsAsFactors = FALSE,
            Ville = c("Nantes","Paris","Bordeaux","Lyon","Marseille"),
         Nb_users = c(100L,500L,300L,400L,500L),
        Longitude = c(-1.5528,2.333333,-0.580816,4.85,5.37),
         Latitude = c(47.218102,48.866667,44.836151,45.75,43.296398))

  # titre
tag.map.title <- tags$style(HTML("
  .leaflet-control.map-title { 
    transform: translate(0%,-170%);
    left: 7%;
    text-align: center;
    padding-left: 10px; 
    padding-right: 10px; 
    font-weight: bold;
    font-size: 22px;
    color: black;
  }
"))
title <- tags$div(tag.map.title, HTML("Localisation des utilisateurs"))

# Dataviz
data_graph |> 
  leaflet() |>
  addTiles() |> 
  addControl(title, position = "topleft", className = "map-title") |> 
  setView(lng = 3, lat = 47, zoom = 4.8) |> 
  addCircles(radius = data_graph$Nb_users, lng = data_graph$Longitude, lat = data_graph$Latitude, #color = data_graph$col, 
             weight = 1, opacity = data_graph$Nb_users, fillOpacity = .1, 
             label = data_graph$Ville,
             popup = paste(data_graph$Ville, ":", data_graph$Nb_users, "utilisateurs")) |> 
  addProviderTiles(provider = "Esri.WorldGrayCanvas")
# all providers : http://leaflet-extras.github.io/leaflet-providers/preview/index.html
  • Points qui se précisent avec le zoom
# Dataviz
data_graph |> 
  leaflet() |>
  addTiles() |> 
  setView(lng = 3, lat = 47, zoom = 4.8) |> 
  addMarkers(lng = data_graph$Longitude, lat = data_graph$Latitude, 
             label = data_graph$Ville,
             clusterOptions = markerClusterOptions()) |> #clusterOptions fait cet effet
  addProviderTiles(provider = "Esri.WorldGrayCanvas")
  • Cartography mapview (choropleth)
# Data pour le graphique
library(geojsonR)
library(httr)
library(sf)
library(mapview)
library(leafpop)
temp_file <- tempfile(fileext = ".geojson")
    #données ODS : https://public.opendatasoft.com/explore/dataset/georef-france-commune/table/?disjunctive.reg_name&disjunctive.dep_name&disjunctive.arrdep_name&disjunctive.ze2020_name&disjunctive.epci_name&disjunctive.ept_name&disjunctive.com_name&disjunctive.ze2010_name&disjunctive.com_is_mountain_area&disjunctive.bv2022_name&sort=year
GET("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/georef-france-commune/exports/geojson?lang=fr&refine=reg_name%3A%22Corse%22&facet=facet(name%3D%22reg_name%22%2C%20disjunctive%3Dtrue)&timezone=Europe%2FBerlin", write_disk(temp_file, overwrite = TRUE))
## Response [https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/georef-france-commune/exports/geojson?lang=fr&refine=reg_name%3A%22Corse%22&facet=facet(name%3D%22reg_name%22%2C%20disjunctive%3Dtrue)&timezone=Europe%2FBerlin]
##   Date: 2025-09-22 15:22
##   Status: 200
##   Content-Type: application/json; charset=utf-8
##   Size: 4.23 MB
## <ON DISK>  /tmp/RtmpLpk4oC/file24463910a368.geojson
communes_contours_geo <- st_read(temp_file, quiet = TRUE)

# Dataviz
communes_contours_geo |> 
    select(dep_name, geometry) |> 
    mutate(dep_name = as.character(dep_name)) |> # ÉTAPE IMPORTANTE SINON "ERROR NON-NUMERIC ARG"
    na.omit() |> 
    st_as_sf() |> 
    mapview(zcol = "dep_name",
            layer.name = "Communes de Corse",
            legend = TRUE, 
            basemaps.color.shuffle = FALSE, map.types = "CartoDB.Positron",
            col.regions = c("Corse-du-Sud" = "#b5dbfb", "Haute-Corse" = "#1d82df"),
            popup = popupTable(communes_contours_geo, zcol = c("dep_name")))
  • Combinaison points and choropleth carto
# Data pour le graphique
data_graph <- data.frame(
  stringsAsFactors = FALSE,
            Ville = c("Ajaccio","Bastia"),
         Nb_users = c(100L,500L),
        Longitude = c(8.736900, 9.450881),
         Latitude = c(41.926701, 42.697285))

# Data pour le graphique
ma_carte <- communes_contours_geo |> 
    select(dep_name, geometry) |> 
    mutate(dep_name = as.character(dep_name)) |> # ÉTAPE IMPORTANTE SINON "ERROR NON-NUMERIC ARG"
    na.omit() |> 
    st_as_sf() |> 
    mapview(zcol = "dep_name",
            layer.name = "Communes de Corse",
            legend = TRUE, 
            basemaps.color.shuffle = FALSE, map.types = "CartoDB.Positron",
            col.regions = c("Corse-du-Sud" = "#b5dbfb", "Haute-Corse" = "#1d82df"),
            popup = popupTable(communes_contours_geo, zcol = c("dep_name"))) +
    mapview(
          data_graph,
          xcol = "Longitude", 
          ycol = "Latitude",
          crs = 4326,                 # coordonnées en WGS84
          grid = FALSE,               # pas de grille
          popup = "Nb_users",           # info affichée au clique
          label = "Ville",            # info affichée au survol
          cex = 4,                    # taille des points
          col.regions = "red",        # couleur des points
          alpha = 0.8,                # transparence
          cluster = TRUE,             # équivalent de clusterOptions()
          basemaps = "Esri.WorldGrayCanvas",
          legend = FALSE)

# Conversion en objet leaflet pour ajouter un titre
leaflet_map <- ma_carte@map %>% 
  addControl(
    html = "<p style='text-align:center; color: darkblue;'>Répartition géographique des répondants</p>",
    position = "topright"
  )
leaflet_map

Lineplot

  • Dual axis on ggplot
# Data pour le graphique
data_graph <- data.frame(temps = c(2015, 2016, 2017, 2018, 2019),
                         n1 = c(123, 736, 927, 827, 329),
                         n2 = c(1120, 2459, 3000, 4903, 6763))
# Dataviz

data_graph |> 
  ggplot(aes(x = temps)) +
  geom_line( aes(y=n1), size=1, alpha=0.9, color = "#3366CC") +
  geom_line( aes(y=n2/(max(n2)/max(n1))), size=1, alpha=0.9, color = "#CC0000") +
  labs(x = "Temps", 
       title = stringr::str_wrap("Évolution de la population et du budget par habitant", width = 50)) +
  scale_y_continuous(name = "Population",
                     sec.axis = sec_axis(~ . * (max(data_graph$n2)/max(data_graph$n1)), 
                                         name = "Budget par habitant")) +  #scale_x_date(date_labels = "%Y %b") +
  theme_classic() +
  theme_custom() +
  theme(legend.position = "right",
        axis.title.y = element_text(color = "#3366CC"),
        axis.title.y.right = element_text(color = "#CC0000"))

Treemap

  • Simple, green color
library(treemap)
library(treemapify)

# Data pour le graphique
data_graph <- data.frame(
         stringsAsFactors = FALSE,
               Importance = c("Forte","Moyenne","Indispensable","Faible","Très forte", "Très faible", "Inexistant"),
               Valeur = c(1,15,6,7,1,9,2))

# Dataviz
data_graph |> 
  ggplot() +
      geom_treemap(aes(area = Valeur, fill = Importance), col = "white", size = 4) +
      geom_treemap_text(aes(area = Valeur, fill = Importance, 
                            label = paste0(Importance, "\n(", Valeur, " actions)")),
                        colour = "white", place = "centre", size = 15, grow = TRUE) +
      scale_fill_manual(values = c("#345E68", "#FEDEA0", "#B7C2A5", "#023743","#7A9BB1", "#B8AA75", "#7B8598", "#345B48", "#476F84", "#D0BA7C")) +
      labs(title = "Nombre d'actions selon leur importance") +
      theme_custom() +
      theme(legend.position = "none")

  • Faceted treemap
library(treemapify)

# Data pour le graphique
data_graph <- data.frame(
         stringsAsFactors = FALSE,
  `Aspect intéropérabilité` = c("Sémantique",
                              "Sémantique","Technique","Sémantique","Sémantique",
                              "Sémantique","Sémantique","Levier humain",
                              "Technique","Technique","Technique",
                              "Levier humain","Levier humain","Levier humain","Sémantique",
                              "Sémantique","Technique","Technique",
                              "Sémantique","Sémantique"),
               Importance = c("Forte","Forte",
                              "Moyenne","Indispensable","Indispensable",
                              "Moyenne","Moyenne","Moyenne","Faible","Moyenne",
                              "Faible","Indispensable","Indispensable",
                              "Forte","Forte","Moyenne","Indispensable","Forte",
                              "Faible","Forte"),
               Num_action = c(1,2,3,4,
                              5,6,7,8,9,10,11,12,13,14,15,
                              16,17,18,19,20)) |> 
  rename(`Aspect intéropérabilité` = Aspect.intéropérabilité)

# Graph
data_graph |> 
  mutate(nb_actions = n(),
         action_agregee = ifelse(n() == 1, 
                                 paste0("1 action\n(", Num_action, ")"), 
                                 paste0(n(), " actions\n(", paste0(Num_action, collapse = ", "), ")")),
         .by = c(`Aspect intéropérabilité`, Importance)) |> 
  distinct(`Aspect intéropérabilité`, Importance, nb_actions, action_agregee) |> 
  mutate(Importance = factor(Importance, levels = c("Faible", "Moyenne", "Forte", "Indispensable"))) |> 
  ggplot(aes(area = nb_actions, fill = Importance, subgroup = `Aspect intéropérabilité`)) +
      geom_treemap(col = "white", size = 4, alpha = .6) +
      geom_treemap_text(aes(label = action_agregee), 
                        place = "centre", grow=F) +
      geom_treemap_subgroup_text(place = "bottom", grow = TRUE,
                             alpha = 0.25, colour = "black",
                             fontface = "italic") +
      #geom_treemap_subgroup_border(colour = "white", size = 13) +
      scale_fill_manual(values = c("Faible" = "#0f85a0", "Moyenne" = "#ffdb52", 
                                   "Forte" = "#ed8b00", "Indispensable" = "#dd4124")) +
      labs(title = "Actions à mener selon leur degré d'importance") +
      facet_grid(~`Aspect intéropérabilité`) +
      theme(legend.position = "top",
            legend.title = ggplot2::element_text(size = 16, color = "#222222"), 
            legend.text = ggplot2::element_text(size = 15,color = "#222222"), 
            strip.text = element_blank(),
            title = element_text(face = "bold", size = 18))

Lolipop chart

  • Min and max values
# Data pour le graphique
data_graph <- data.frame("variable" = c("sepal_length", "sepal_width", "petal_length", "petal_width"),
           "Minimum" = c(min(iris$sepal_length), min(iris$sepal_width), 
                         min(iris$petal_length), min(iris$petal_width)),
           "Maximum" = c(max(iris$sepal_length), max(iris$sepal_width), 
                         max(iris$petal_length), max(iris$petal_width)),
           "Moyenne" = c(mean(iris$sepal_length), mean(iris$sepal_width), 
                         mean(iris$petal_length), mean(iris$petal_width)))

# Dataviz
data_graph |> 
  arrange(variable) |> 
  mutate(variable = factor(variable, levels = rev(unique(variable)))) |> 
  ggplot() +
  geom_segment(aes(x = Minimum, xend = Maximum, y = variable, yend = variable), col = "grey50") +
  geom_segment(aes(x = Moyenne, xend = Moyenne+.02, y = variable, yend = variable), 
               colour = "black", lwd = 3) +
  geom_point( aes(x = Minimum, y = variable), color = "#2B73B4", size=3, alpha = .8) +
  geom_point( aes(x = Maximum, y = variable), color = "#dd4124", size=3, alpha = .8) +
  geom_text(aes(x = Moyenne+.2, y = variable, label = round(Moyenne, 1), 
                hjust = "bottom", vjust = "bottom"), col = "#333333") +
  labs(title = "Longeur minimales, maximales et moyennes des \niris", x = "Longeur en cm", y = "") +
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) +
  scale_color_manual(values = c("petal_length" = "#2B73B4", 
                                "sepal_width" = "#fecf5d", 
                                "sepal_length" = "#dd4124", 
                                "petal_width" = "#ed8b00")) +
  theme_custom() +
  theme(legend.position = "none", 
        plot.title = element_text(face = "bold"),
        panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.y = ggplot2::element_blank())

  • Highlight value
library(ggtext)

# Dataviz
data_graph |> 
  ggplot() +
  geom_segment(aes(x = 0, xend = Maximum, y = variable, yend = variable), 
               color = ifelse(data_graph$variable == "sepal_length", "#dd4124", "#fecf5d"),
               size = ifelse(data_graph$variable == "sepal_length", 2, 1)) +
  geom_point(aes(x = Maximum, y = variable), 
             color = ifelse(data_graph$variable == "sepal_length", "#dd4124", "#fecf5d"),
             size = ifelse(data_graph$variable == "sepal_length", 3, 2)) +
  labs(title = "Le maximum des <span style='color: #dd4124'>longeurs de sépales</span> est <br>plus élevé que les <span style='color: #fecf5d'>autres mesures</span>", #str_wrap() ne marche plus avec le element_markdown()
         y = "", x = "Longeur en cm") +
  theme_custom() +
  theme(legend.position = "none", 
        plot.title = element_markdown(),
        panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.y = ggplot2::element_blank()) 

  # ggplot2::annotate("text",
  #            x = grep("Associations de quartier", data_graph$variable),
  #            y = data_graph$Maximum[which(data_graph$variable == "sepal_length")]*1.1,
  #            label = paste0(round(percent_collab_quartier$percent, 0), "% des collaborations \nse font avec des \nassociations de quartier"),
  #            color = "#3182BD", size=4 , angle=0, fontface="bold", hjust=0)


Tables

gttable()

library(gt)
library(gtExtras)
data |> 
    # Transformation de la colonne externalité pour mettre le picto
    mutate(` ` = case_match(Externalité,
                                    "Lien social" ~ "images/lien_social.png",
                                    "Solidarités" ~ "images/solidarites.png",
                                    "Vie de quartier" ~ "images/vie_quartier.png",
                                    "Santé et sécurité" ~ "images/sante_securite.png",
                                    "Environnement" ~ "images/environnement.png",
                                    "Espace public" ~ "images/espace_public.png")) |> 
    relocate(` `) |> 
    # Ordre des lignes et groupes
    mutate(Externalité = factor(Externalité, 
                                levels = c("Lien social","Solidarités","Vie de quartier","Santé et sécurité","Environnement","Espace public"))) |> 
    mutate(ordre = ifelse(Indicateur == "% de commerces combinant les 3 externalités", 2, 1)) |> 
    arrange(Externalité, ordre) |>  # Tri par externalité, puis indicateur
    select(-ordre) |> 
    # GT TABLE
    gt(groupname_col = "Externalité") |> 
    # Titres
    tab_header(title = md("**Comparaison territoriales des externalités positives du commerce**")) |>
    tab_source_note(source_note = md("*Données d'une enquête diffusée d'avril à novembre 2024 auprès de **324 commerces** de France entière.*")) |> 
    # Rassemblement des chiffres par territoires
    tab_spanner(label = md("**Paris**"), columns = c(`Nombre de réponses`, `%`)) |> 
    tab_spanner(label = md("**St Ouen**"), columns = c(` Nombre de réponses`, ` %`)) |> 
    tab_spanner(label = md("**Rouen**"), columns = c(`Nombre de réponses `, `% `)) |> 
    # Stype de la table
    tab_style(style = list(cell_text(weight = "lighter")), 
              locations = cells_body(columns = Indicateur)) |>  
    # Couleur des indicateurs récapitulatifs
    tab_style(style = list(cell_fill(color = "lightgrey", alpha = 1)),
              locations = cells_body(columns = everything(), 
                                     rows = Indicateur == "% de commerces combinant les 3 externalités")) |> 
    # Intégration des pictos externalités
    text_transform(locations = cells_body(columns = " "),
                   fn = function(x) {
                      local_image(
                        filename = x,
                        height = 27)
                    }) |> 
    cols_width(` ` ~ "5%") |> 
    # % en barres
    gt_plt_bar_pct(`%`, scaled = TRUE, labels=TRUE, decimals = 0, 
                   font_size = "14px", fill = "#343333", height = 20) |> 
    gt_plt_bar_pct(` %`, scaled = TRUE, labels=TRUE, decimals = 0, 
                   font_size = "14px", fill = "#343333", height = 20) |> 
    gt_plt_bar_pct(`% `, scaled = TRUE, labels=TRUE, decimals = 0, 
                   font_size = "14px", fill = "#343333", height = 20) |> 
    # couleur des noms de groupes (familles d'externalités)
    tab_style(style = list(cell_fill(color = "#B4B1B1", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Lien social")) |> 
    tab_style(style = list(cell_fill(color = "#004654", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Solidarités")) |> 
    tab_style(style = list(cell_fill(color = "#0097B2", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Vie de quartier")) |> 
    tab_style(style = list(cell_fill(color = "#00A589", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Santé et sécurité")) |>
    tab_style(style = list(cell_fill(color = "#E1B441", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Environnement")) |>
    tab_style(style = list(cell_fill(color = "#FF5757", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Espace public")) |> 
    tab_style(style = list(cell_borders(sides = c("t", "b"), color = "white", weight = px(2))),
              locations = cells_row_groups()) |> 
    # centrage des colonnes
    cols_align(align = "center", columns = c(`Nombre de réponses`, `%`, ` Nombre de réponses`, ` %`, `Nombre de réponses `, `% `)) |> 
    # retirer la ligne horizontale au-dessus du titre
    tab_options(table.border.top.style = "none", 
                table.border.top.width = px(0)) |>    
    # table dans une boxe avec barre de défilement verticale
    tab_options(table.width = "100%",         # Largeur de la table
                container.overflow.x = "auto", # Scroll horizontal si nécessaire
                container.overflow.y = "auto", # Scroll vertical si nécessaire
                container.height = px(600)) |>     # Hauteur fixe avec défilement vertical
    # fixe les noms de colonnes et titre 
    opt_css(css = "
      /* Fixer le titre */
      .gt_title, .gt_subtitle {
        position: sticky;
        top: 0; 
        background-color: #f9f9f9; /* Couleur de fond pour le titre */
        z-index: 2;               /* Met le titre au-dessus */
        padding: 5px;             /* Ajoute un peu de marge interne */
      }
      
      /* Fixer les noms des colonnes */
      thead th {
        position: sticky;
        top: 30px;                /* Ajuste en fonction de la hauteur du titre */
        background-color: #ffffff; /* Couleur de fond pour l'en-tête */
        z-index: 1;               /* Met les noms de colonnes devant les lignes */
      }")


DT::datatable()

datatable(data, options = list(pageLength = 5, scrollX = TRUE))


knitr::kable()

knitr::kable(stat_indiv, format = "html") |> 
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))


reactable()

  • Colorize cells based on categorical column
library(reactable)

# Fonction pour colorer les catégories
category_styles <- function(Catégorie) {
  case_when(
    Catégorie == "Faible" ~ list(background = "#e3f2fd", color = "black"),
    Catégorie == "Moyen" ~ list(background = "#b5dbfb", color = "black"),
    Catégorie == "Élevé" ~ list(background = "#1d82df", color = "white"),
    Catégorie == "Très élevé" ~ list(background = "#0d47a1", color = "white"),
    Catégorie == "NA" ~ list(background = "#CCCCCC", color = "black"),
    TRUE ~ list(background = NA, color = NA)
  )
}

# Affichage de la table
table |> 
    reactable(columns = list(Catégorie = colDef(style = function(value){category_styles(value)})))
  • Colorize all columns except one based on numerical column
# Indicateurs en une table
table <- data.frame(
  Nom = c("nom1", "nom3", "nom3"),
  Score = c(85, 92, 78),
  Age = c(25, 30, 22),
  Height = c(160, 175, 168)
)

# Fonction pour colorer les indicateurs
indicateur_styles <- function(Indicateur) {
    # calcul des statistiques
  q1 <- quantile(Indicateur, 0.25, na.rm = TRUE)
  med <- median(Indicateur, na.rm = TRUE)
  q3 <- quantile(Indicateur, 0.75, na.rm = TRUE)
    # conditions de mise en forme
  sapply(Indicateur, function(value) {
    dplyr::case_when(
      value <= q1 ~ "background-color: #e3f2fd; color: black;",
      value > q1 & value <= med ~ "background-color: #b5dbfb; color: black;",
      value > med & value <= q3 ~ "background-color: #1d82df; color: white;",
      value > q3 ~ "background-color: #0d47a1; color: white;",
      TRUE ~ "background-color: #CCCCCC; color: black;"
    )
  })
}

# Liste des colonnes sur lesquelles appliquer le style
column_defs <- setNames(
  lapply(names(table), function(col) {
    colDef(
      style = if (col != "Nom") {
        function(value, index, name) {
          indicateur_styles(table[[col]])[index]
        }
      } else {
        NULL
      }
    )
  }),
  names(table)
)

# Affichage de la table
table |> 
    reactable(columns = column_defs, defaultPageSize = 5)
  • Freeze first column
# Fixer la première colonne
data |> reactable(columns = list(ID = colDef(sticky = "left")))

# Fonction pour fixer la première colonne en plus d'appliquer aux autres colonnes la mise en forme des couleurs
column_defs <- setNames(
  lapply(names(table), function(col) {
    if (col != "ID") {
      colDef(
        style = function(value, index, name) {
          indicateur_styles(table[[col]])[index]
        }
      )
    } else {
      colDef(
        sticky = "left"
      )
    }
  }),
  names(table)
)
  • Export reactable as HTML and JPG
# Export de la table
library(htmlwidgets)
library(webshot2)
saveWidget(widget = table_rea, file = "../mon_path/data.html", selfcontained = TRUE)
saveWidget(widget = ma_carte@map, file = "../mon_path/carto.html", selfcontained = TRUE) #si carte mapview
webshot2::webshot("../mon_path/data.html", "../mon_path/data.jpg", 
                  vwidth = 1200, vheight = 600)


Fin d’analyse


Export

  • Export CSV
rio::export(data, "~/Downloads/tableau.csv")
write.csv(data, "~/Downloads/tableau.csv", row.names = FALSE, fileEncoding = "UTF-8")
  • Export plots
saving_plot <- function(graph, name, width, height) {
  ggsave(file = glue("~/Downloads/SVG/{name}.svg"), plot = graph, width = width, height = height)
  ggsave(file = glue("~/Downloads/PNG/{name}.png"), plot = graph, width = width, height = height)
}
saving_plot(graph, "histogram", 9, 5)
  • Export html objects (leaflet map, plotly)
library(htmlwidgets)
saveWidget(map, file = "ma_carte.html")


R Markdown

  • Footer, CSS, header files in other folder
includes:
    in_header: !expr here::here("inst/rmarkdown/resources/header.html")
  • Logo / image at top of the document
# Logo haut de page
htmltools::img(src = "lien/vers/mon/image", 
               alt = 'logo', 
               style = 'position:absolute; top:0; right:0; width:400px') #width pour la taille! (ici positionné en haut à droite)
  • Output file into ‘reports’ folder
# Mettre dans le header du document RMD
knit: (
  function(inputFile, encoding) { 
    rmarkdown::render(inputFile, params = "ask",  
      encoding    = encoding,
      output_dir = "../reports", 
      output_file = paste0(tools::file_path_sans_ext(inputFile), ".html")) })
  • Justify text
# Mettre en corps de texte du RMD ou dans un fichier CSS à part
<style>
body {
text-align: justify
}
</style> 
  • Change font, color and indent text
# Mettre en corps de texte du RMD 
<p style="margin-left: 20px; font-size: 2em; color: #304B95;">**Simulateur**</p>
  • Custom dfsummary
print(dfSummary(data_summary, style = "grid", graph.magnif = 1, 
                valid.col = FALSE, varnumbers = FALSE, tmp.img.dir = "/tmp", 
                max.distinct.values = 5, headings = FALSE, method = "render", 
                col.widths  = c(300, 200, 100, 50, 20)),
      max.tbl.height = 600,
      method = "render")
  • Use installed font family
library(showtext)
font_add("Nexa", regular = "Nexa Bold.otf")
font_add("Trade Gothic", regular = "Trade Gothic.otf")
showtext_auto()
# then specify on the CSS the name of the font
  • TOC
output:
    rmarkdown::html_document:
      toc: true
      toc_float: true
      toc_depth: 2
      number_sections: true
# {-} après certains titres si on veut enlever le numérotage automatique pour ceux-là
  • Python on Rmd
# En début de Rmd
library(reticulate)

# Première utilisation
py_install("pandas") #pandas
pip install plotly #plotly, à runer dans le terminal

# Pour utiliser un environnement virtuel
Sys.setenv(RETICULATE_PYTHON = "path/to/python.exe") 
virtualenv_create("test_proj")
py_install("pandas", envname = "test_proj", method = "auto")
use_virtualenv("test_proj")
#```{python}
import pandas
import plotly.express as px
matrice = [[43, 57], [12, 88]]
fig = px.imshow(matrice)
fig.show()
#```
  • Remove blank space end RMD file
<div class = "tocify-extend-page" data-unique = "tocify-extend-page" style = "height: 0;"></div>
  • RMD collaboratif
# Déposer Rmd sur GDrive pour travailler en collaboration   
    # LE METTRE DANS UN DOSSIER TRACKDOWN ET LE NOM EN LIGNE DOIT GARDER L'EXTENSION .RMD
trackdown::upload_file(file = "scripts/Rapport_final.Rmd", gfile = "Rapport_final.Rmd")
trackdown::download_file(file = "scripts/Rapport_final.Rmd", gfile = "Rapport_final.Rmd")
  • Insert and center image in Rmd page
<p align="center">
 <img src="../figures/graphique.png" width = "110">
</p>
    #ou
![](../figures/graphique.png){fig-align="center"} #en corps de texte
  • Embed HTML in Rmd page

Code à mettre en corps de texte pour que ça run :

# Embed centré, html en ligne (ex: carte ODS)
<div align="center">
<iframe frameborder="0" width="800" height="600" src="lien/vers/mon/graphique"></iframe>
</div>

# HTML en local
<iframe src="Save_cartos/carte_p17_t1.html" height="600" width="1000" style="border: 0px solid #464646;" allowfullscreen="" allow="autoplay" data-external="1"></iframe>

Attention bien mettre “self_contained: false” dans le header. Ouvrir dans un browser pour voir le résultat.

Autre solution :

knitr::include_url(glue('../figures/tableau_global/tableau_global_{name_salarie}.html'))

Mettre ça dans <style> en corps de texte du RMD pour enlever la bordure noire.

iframe {
  border: none;
}
  • Documentation

Pimp my rmd, Yan Holtz


Git

  • Cancel last commit NOT pushed
git reset HEAD~1
  • Cancel last commit pushed
git revert HEAD
  • Merge diverging branches
git config pull.rebase false
  • Stash local changes for specific file
git stash push
git stash push scripts/tableau_de_bord.html #specific file

git stash push data/indicateurs_Sarah/CRM_odoo_dashboard.csv
git stash push data/indicateurs_Sarah/Suivi_budget_previsionnel.html
git stash push data/indicateurs_Sarah/table_budget_previsionnel.csv
git stash push data/indicateurs_Sarah/table_tjh.csv
  • Stash local changes for all files
git stash # annuler le dernier commit
git stash push --include-untracked # supprimer tous les changements locaux (pull possible ensuite)

Quand message “needs merge” :

  • sélectionner les fichiers en conflit (ex : data/table_tjh.csv)
  • les commit
  • push


Github actions

  • Install package from github (not from CRAN)
  1. Télécharger le repo contenant le package via l’URL : https://github.com/account/reponame/archive/branchname.tar.gz
  2. Le sauvegarder dans un dossier du repo de travail actuel
  3. L’installer via cette commande à ajouter au .yaml de l’action (name: install icons package, sans oublier d’installer le package remotes)
# Intégrer dans .github/workflows/render-document.yaml
      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          cache-version: 2
          packages:
            any::knitr
            any::tidyverse
            any::flexdashboard
            any::remotes
            
      - name: install icons package
        run: |
          Rscript -e "remotes::install_url('https://raw.githubusercontent.com/datactivist/plans_de_charge/main/.github/workflows/master.tar.gz')"


Divers


  • Bash commands in R
system(glue("cat {in_dir}/raw_data_{year}.jsonl | jq -c '{{doi, year, bso_classification, hal_id}}' | jq --slurp > {out_dir}/unnested_data_{year}.json"))
  • Parallel execution command
library(parallel)
library(doParallel)
library(foreach)
years <- 2013:2020

numCores <- 2
registerDoParallel(numCores)

foreach (year = years) %dopar% {
  ma_fonction(year)
}

stopImplicitCluster()
  • Execution time of a command
start.time <- Sys.time()
# R code here
end.time <- Sys.time()
round(end.time - start.time,2)


Icônes

  • Icons pages

https://fontawesome.com/search?o=r&m=free

https://ionic.io/ionicons

https://jpswalsh.github.io/academicons/

  • Icons into rmarkdwon

, meilleure solution (bibliothèque d’icones).

#`r icon_style(emojifont::fontawesome("play", style = NULL), fill = "#0000CC")`, implique ce chunk en début de Rmd ([bibliothèque d'icones](https://fontawesome.com/v4/icons/)) :


#remotes::install_github("mitchelloharawild/icons", force = TRUE)
library(icons)
#download_fontawesome()
library(extrafont)
  • Illustrations libres de droits

https://cocomaterial.com/

Fonctions créées

  • Export object created within function
assign(glue("n_{year}"), n, envir = .GlobalEnv)
  • Detach created object from environment
rm(ls = inter_bv, ratio, area_commune, area_2017)
  • Possibly function treatment
tryCatch(ma_fonction(data), error = function(e) NULL)
  • Use function saved in other directory
source(here("functions", "match_commune.R"))
object <- memoise::memoise(match_commune, cache = memoise::cache_filesystem(here("cache")))
  • Call variable within created function
fonction <- function(data, variable){
    data |> filter({{variable}} == 2)
}
  • Rename column wihtin created function
fonction <- function(new_cols, cols){
    data |> rename({{ new_cols }} := {{ cols }}) 
}
  • Huge utilisation created function
library(purrr)
purrr::map(.x = c(13:16, 18, 26:38, 40:44, 46, 49, 55, 66:68, 70, 84:90, 93:96, 99:108, 111:113, 115, 139, 162:178, 180, 181, 188:197),
          .f = ~table_recap_simple(.x))
table_recap_stat <- rbind(lapply(ls(pattern="^all_stat_"), function(x) get(x))) |> 
    bind_rows()
  • Create column if it does not already exist
datat <- data |> mutate(`NA` = ifelse("NA" %in% names(data), `NA`, "0%"))
  • “Error in auto_copy(): ! x and y must share the same src”

Dans une fonction créée, lorsqu’une base de données est appelée sans être mise comme argument attendu, il faut l’ajouter comme argument de fonction.

  • Get statistics within function
inter_categories <- function(data, variable){
    # Calcul
    min_val <- min(data[[variable]], na.rm = TRUE) #data[[variable]] pour accéder à la variable dans un df
    q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
    med <- median(data[[variable]], na.rm = TRUE)
    q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
    max_val <- max(data[[variable]], na.rm = TRUE)
    
    # Assignation à l'environnement global
    assign("min_val", min_val, envir = .GlobalEnv) #nom d'objet "min_val" et pas "min" pour pas crééer de conflit avec le nom de fonction
    assign("q1", q1, envir = .GlobalEnv)
    assign("med", med, envir = .GlobalEnv)
    assign("q3", q3, envir = .GlobalEnv)
    assign("max_val", max_val, envir = .GlobalEnv)
}
inter_categories(table, "Densité (habitant par m²)") #nom de variable entre guillemets et pas backticks sinon est considéré comme objet à part et non une variable du jeu de données
  • Automatic tab for elements of a list in a flexdashboard document
# voir script ici [repo privé] : https://github.com/datactivist/plans_de_charge/blob/main/scripts/tableau_de_bord.Rmd
# ATTENTION : ne pas mettre de caractères spéciaux (&.) dans les noms data-navmenu=""
 

Document sous licence ouverte réalisé par Diane Thierry

diane@datactivist.coop